Rare species patterns and results excluding rare species for no-time models

Classifying rare species

Species classification was done for each forest plot and time interval separately. We used the living and dead individuals of the final census from the interval. We used the FuzzyQ clustering algorithm in the ‘FuzzyQ’ R package (Balbuena et al., 2021) to estimate the probability of each species to be common or rare based on species abundance and occupancy in 50x50 m quadrats.

To describe and compare rarity patterns in all forest, we estimated the number of species, number of individuals, and density of rare and common species. For the forests plots with more than 1 census interval, we averaged the estimates across census intervals.

Here, we provide the list of species code and the classification in a Rdata file.

load(here("data/rare_species_classification.Rdata"))

# plot variables including
plots_info <- read.table(here("data/plot_sites_information.csv"), sep=",", header=T)

classi <-classi %>% left_join(plots_info[,c(1,6,9,12,17:23,28)], by=c("fplot" = "ID"))
classi$fplot.time = paste0(classi$fplot,"_", classi$time)
rich = classi %>%  count(fplot.time,rarity) %>% rename(rich=n)
ntree = classi %>% group_by(fplot.time,rarity) %>% 
  summarise(ntrees = sum(n))

dat <- rich %>% left_join(ntree, by=c("fplot.time", "rarity")) %>%
  pivot_wider(names_from = rarity, values_from =c(rich, ntrees)) %>%
  mutate(p.rich_common = rich_common*100/(rich_common+rich_rare),
         p.rich_rare = rich_rare*100/(rich_common+rich_rare),
         p.ntrees_common = ntrees_common*100/(ntrees_common+ntrees_rare),
         p.ntrees_rare = ntrees_rare*100/(ntrees_common+ntrees_rare))

#Mean per forest plots with more than 1 census interval
dat.m <- dat %>% separate(fplot.time, c("fplot", "time")) %>%
  group_by(fplot) %>% 
  summarise_at(vars(rich_common:p.ntrees_rare),mean) %>%
  group_by(fplot) %>% 
  summarise_at(vars(rich_common:p.ntrees_rare),round,digits=0) %>%
  select(fplot,rich_common, p.rich_common, rich_rare, p.rich_rare,
         ntrees_common, p.ntrees_common, ntrees_rare, p.ntrees_rare)
dat.m %>%
  htmlTable(header=c("Forest" ,rep(c("N", "%"), 4)),
            cgroup = list(c("", "Species richness", "Number of trees"),
                           c("", "Common", "Rare", "Common", "rare")),
            n.cgroup = list(c(1,4,4),
                             c(1,2,2,2,2)),caption="Table S4.1. Number and percentages of species and trees classified as common or rare per forest plot. See Table S1.1 and S1.2 for forest plot names and information. For forest plots with more than 1 census interval, we average the values across intervals."
            )
Table S4.1. Number and percentages of species and trees classified as common or rare per forest plot. See Table S1.1 and S1.2 for forest plot names and information. For forest plots with more than 1 census interval, we average the values across intervals.
  Species richness   Number of trees
  Common   Rare   Common   rare
Forest   N %   N %   N %   N %
1 ama   456 35   839 65   98878 90   11578 10
2 bci   135 42   185 58   334275 96   12477 4
3 edo   122 29   298 71   168253 96   7004 4
4 fus   55 50   55 50   143813 98   2376 2
5 idc   74 53   65 47   34651 95   1675 5
6 kor   186 40   282 60   335392 94   22608 6
7 lam   539 38   869 62   376389 86   61135 14
8 ldw   14 38   23 62   28004 96   1257 4
9 len   108 29   262 71   145467 97   4296 3
10 lpl   108 45   132 55   120516 96   4735 4
11 luq   55 37   95 63   69233 96   2769 4
12 mos   96 34   182 66   157032 95   7652 5
13 pas   375 44   478 56   356955 92   31286 8
14 scbi   25 35   47 65   38366 97   1248 3
15 serc   20 27   54 73   27031 96   1204 4
16 sin   118 50   118 50   207356 94   12196 6
17 ucsc   10 32   21 68   8738 96   394 4
18 wab   11 28   28 72   49562 93   3558 7
19 wfdp   8 31   18 69   28948 97   831 3
20 wyw   5 20   20 80   19300 96   732 4
21 zof   3 23   10 77   75368 99   381 1
classi %>% group_by(rarity) %>%
  mutate(dens.ha = n/size_data) %>%
  summarise(minD = min(dens.ha),
            maxD = max(dens.ha),
            meanD = mean(dens.ha),
            sdD = sd(dens.ha), 
            medianD = median(dens.ha),
            q90D = quantile(dens.ha, 0.9),
            q95D = quantile(dens.ha, 0.95)) %>%
  kable(digits=2, caption = "Table S4.2. Summary values for the density of trees (N/ha) classified as common or rare for the 21 forest plots. SD - standard deviation, Quant - quantiles.")
Table S4.2. Summary values for the density of trees (N/ha) classified as common or rare for the 21 forest plots. SD - standard deviation, Quant - quantiles.
rarity minD maxD meanD sdD medianD q90D q95D
common 1.32 3840.80 35.24 128.01 11.26 63.74 109.80
rare 0.02 297.32 1.39 5.14 0.55 2.84 4.39
dat.m2 <- dat.m %>% 
  left_join(plots_info[,c(1,6,9,12,17:23,28)], by=c("fplot" = "ID")) %>%
  mutate(tot.rich = rich_common + rich_rare,
         tot.n = ntrees_common+ntrees_rare)
prich <- dat.m2 %>%
  pivot_longer(c(p.rich_common, p.rich_rare),names_to = "rarity",
               values_to = "richness") %>%
  ggplot(aes(x=fct_reorder(fplot, tot.rich, max),
             y=richness, fill=rarity)) +
    geom_col() +
  xlab("") + ylab("Species richness (%)") +
  labs(tag="A")+
  scale_fill_discrete("Species",
                      labels=c("common", "rare"))+
  theme(axis.text.x = element_text(angle = 45, hjust=1))

p.ntree <- dat.m2 %>%
  pivot_longer(c(p.ntrees_common, p.ntrees_rare),names_to = "rarity",
               values_to = "ntrees") %>%
  ggplot(aes(x=fct_reorder(fplot, Nsp_literature, max),
             y=ntrees, fill=rarity)) +
    geom_col()+
  labs(tag="B")+
    xlab("") + ylab("Number of trees (%)") +

  scale_fill_discrete("Species",
                      labels=c("common", "rare"))+
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        legend.position = "none")

prich/p.ntree + plot_layout(guides = "collect")
Figure S4.1. Percentages of rare and common species and number of trees per forest plot. Forest plots are arranged by absolute latitude (from the largest to the smallest). See Table S1.2 for plots names.

Figure S4.1. Percentages of rare and common species and number of trees per forest plot. Forest plots are arranged by absolute latitude (from the largest to the smallest). See Table S1.2 for plots names.

ggsave(here("figures/FIG_S4.1_class_species.jpeg"), height = 7, width = 6, unit="in")
library(wesanderson) #better colour palette
# Gradient color for latitud
pal <- wes_palette("Zissou1",21, type = "continuous")[21:1] # azul frio-temperado
dat.m2 %>% 
  ggplot(aes(x=tot.rich, y=p.rich_rare))+ 
  scale_x_log10()+
  geom_point(aes( col=abs(lat)), size=3) +
  scale_color_gradientn(name="Latitude \n (abs)",colors=pal) +
  geom_smooth(method=mgcv::gam,se=T, formula = y ~ s(x, bs = "cs")) +
  xlab("Number of species") +
  ylab("Percentage of rare species") 
Figure S4.2. Percentage of rare species against the number of species (log10 scale) for 21 worldwide distributed forest plots. Blue line and grey area are the fitted results and confidence intervals for a generalised additive model showing a decrease in the percentage of rare species with the number of species but only for forests with less than 100 species. Model's adjusted R2 = 0.30. Each forest plot is coloured by the latitude in absolute values.

Figure S4.2. Percentage of rare species against the number of species (log10 scale) for 21 worldwide distributed forest plots. Blue line and grey area are the fitted results and confidence intervals for a generalised additive model showing a decrease in the percentage of rare species with the number of species but only for forests with less than 100 species. Model’s adjusted R2 = 0.30. Each forest plot is coloured by the latitude in absolute values.

ggsave(here("figures/FIG_S4.2_propRare_nsp.jpg"), width=8, height = 5)
mod <- mgcv::gam(p.rich_rare ~ s(log(tot.rich), bs="cs"), data=dat.m2, method = "REML")
summary(mod)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## p.rich_rare ~ s(log(tot.rich), bs = "cs")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   63.810      1.632   39.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                    edf Ref.df     F p-value  
## s(log(tot.rich)) 1.803      9 0.958  0.0168 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.301   Deviance explained = 36.4%
## -REML =  72.24  Scale est. = 55.953    n = 21

No time models

Running models with different datasets:

  • all: original data, all species all individuals

  • exclude: exclude rare species

  • regroup: regrouping observations of rare species in ONE species group

Using only the 5x5 m quadrat scale

#Figures colors/labels
cores <- c("#c00000", "#b666d2", "#70ad47", "#ffc000")

labvit <- as_labeller(c(grow = "Growth", mort="Mortality", 
                        rec="Recruitment"))
grplab = as_labeller(c(quadrat = "Space", `quadrat:sp`= "Space X Species",
                       sp="Species", Residual = "residual"))
grpvit <- as_labeller(c(grow = "Growth", mort="Mortality", 
                        rec="Recruitment",
                        quadrat = "Space", `quadrat:sp`= "Species X Space",
                        sp="Species", Residual = "Residual"))

Data

Growth

grp <- c("all","exclude", "regroup")
tudo <- list()
path1 = here("models_outputs/models_notime/")
for (i in 1:length(grp)){
  path <- paste0(path1, grp[i], "/grow/table")
  files = list.files(path)
  all <-  lapply(files, function(x) get(assign(x, load(paste0(path, "/", x)))))
  names(all) <- substr(files,1,nchar(files)-6)
  tudo[[i]] <- all
}
names(tudo) <- grp
tes <- map(tudo,bind_rows, .id="id") %>% bind_rows() %>% filter(q_size=="quad_5")

#tes %>% count(data,fplot) %>% mutate(n=n/8)
table(tes$fplot,tes$data)/5
##       
##        all exclude regroup
##   ama    1       1       1
##   bci    6       6       6
##   edo    2       2       2
##   fus    3       3       3
##   idc    1       1       1
##   kor    1       1       1
##   lam    3       3       3
##   ldw    1       1       1
##   len    2       2       2
##   lpl    1       1       1
##   luq    3       3       3
##   mos    2       2       2
##   pas    4       4       4
##   scbi   1       1       1
##   serc   1       1       1
##   sin    2       2       2
##   ucsc   2       2       2
##   wab    2       2       2
##   wfdp   1       1       1
##   wyw    3       3       3
##   zof    1       1       1
#remove intercept to include again as a separate column
intercepts <- tes %>% filter(term == "intercept")
names(intercepts)[9] = "intercept"

grow <- tes %>% filter(term != "intercept") %>%
  left_join(intercepts[,c(1:4,9)], by= c("id", "data", "fplot", "time")) %>%
  group_by(id, data, fplot, time) %>% 
  mutate(VPC = variance/sum(variance))  %>% ungroup() %>%
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp", "Residual")) %>%
  as.data.frame()

# divide the sd of the terms by the mean growth (intercept) for each dataset.
grow$stdev = grow$Estimate/grow$intercept

Mort

grp <- c("all","exclude", "regroup")
tudo <- list()
path1 = here("models_outputs/models_notime/")
for (i in 1:length(grp)){
  path <- paste0(path1,grp[i], "/mort/table")
  files = list.files(path)
  all <-  lapply(files, function(x) get(assign(x, load(paste0(path, "/", x)))))
  names(all) <- substr(files,1,nchar(files)-6)
  tudo[[i]] <- all
}
names(tudo) <- grp
tes <- map(tudo,bind_rows, .id="id") %>% bind_rows() %>% filter(q_size=="quad_5")

#tes %>% count(data,fplot) %>% mutate(n=n/8)
table(tes$fplot,tes$data)/5
##       
##        all exclude regroup
##   ama    1       1       1
##   bci    7       7       7
##   edo    2       2       2
##   fus    3       3       3
##   idc    1       1       1
##   kor    1       1       1
##   lam    3       3       3
##   ldw    1       1       1
##   len    2       2       2
##   lpl    1       1       1
##   luq    3       3       3
##   mos    2       2       2
##   pas    5       5       5
##   scbi   1       1       1
##   serc   1       1       1
##   sin    2       2       2
##   ucsc   2       2       2
##   wab    2       2       2
##   wfdp   1       1       1
##   wyw    3       3       3
##   zof    1       1       1
#remove intercept to include again as a separate column
intercepts <- tes %>% filter(term == "intercept")
names(intercepts)[9] = "intercept"

mort <- tes %>% filter(term != "intercept") %>%
  left_join(intercepts[,c(1:4,9)], by= c("id", "data", "fplot", "time")) %>%
  group_by(id, data, fplot, time) %>% 
  mutate(VPC = variance/sum(variance))  %>% ungroup() %>%
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp", "Residual")) %>%
  as.data.frame()
mort$stdev = mort$Estimate

Rec

grp <- c("all","exclude", "regroup")
tudo <- list()
path1 = here("models_outputs/models_notime/")
for (i in 1:length(grp)){
  path <- paste0(path1,grp[i], "/rec/table")
  files = list.files(path)
  all <-  lapply(files, function(x) get(assign(x, load(paste0(path, "/", x)))))
  names(all) <- substr(files,1,nchar(files)-6)
  tudo[[i]] <- all
}
names(tudo) <- grp
tes <- map(tudo,bind_rows, .id="id") %>% bind_rows() %>% filter(q_size=="quad_5")

#tes %>% count(data,fplot) %>% mutate(n=n/8)
table(tes$fplot,tes$data)/5
##       
##        all exclude regroup
##   bci    7       7       7
##   edo    2       2       2
##   fus    3       3       3
##   idc    1       1       1
##   kor    1       1       1
##   lam    3       3       3
##   ldw    1       1       1
##   len    2       2       2
##   lpl    1       1       1
##   luq    3       3       3
##   mos    2       2       2
##   pas    5       5       5
##   scbi   1       1       1
##   serc   1       1       1
##   sin    2       2       2
##   ucsc   2       2       2
##   wab    2       2       2
##   wfdp   1       1       1
##   wyw    3       3       3
##   zof    1       1       1
#remove intercept to include again as a separate column
intercepts <- tes %>% filter(term == "intercept")
names(intercepts)[9] = "intercept"

rec <- tes %>% filter(term != "intercept") %>%
  left_join(intercepts[,c(1:4,9)], by= c("id", "data", "fplot", "time")) %>%
  group_by(id, data, fplot, time) %>% 
  mutate(VPC = variance/sum(variance))  %>% ungroup() %>%
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp", "Residual")) %>%
  as.data.frame()
rec$stdev = rec$Estimate

Excluding recruitment for Wytham Woods, as in “all data”.

rec <- rec %>% filter(fplot!="wyw")

Combining results.

comb <- bind_rows(list(grow=grow, mort=mort, rec=rec), .id="vital") %>%
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp","Residual")) %>%
  ungroup() 

Comparing groups

comb %>% 
  ggplot(aes(x=data, y=VPC)) +
  geom_col(aes(fill=term, color=term), alpha=0.7) +
  scale_fill_manual(values=cores) +
  scale_color_manual(values=cores) +
  facet_grid(vital~fplot+time) +
  theme(axis.text.x = element_text(angle=45, hjust=1))

mean over fplots

comb %>% group_by(vital,data,term) %>% summarise(VPC=mean(VPC)) %>%
  ggplot(aes(x=data, y=VPC)) +geom_col(aes(fill=term, color=term), alpha=0.7) +
  scale_fill_manual(values=cores) + 
  scale_color_manual(values=cores) +
  facet_grid(~vital)+
  theme(axis.text.x = element_text(angle=45, hjust=1))+
  labs(tag="a)")

#ggsave("figs/FIG_S4.4a_groups_mean.jpg", width=20, height = 10, units = "cm")
comb %>%
  ggplot(aes(x=data,y=VPC, col=data)) + geom_boxplot()+
  facet_grid(vital~term) +
  theme(axis.text.x = element_text(angle=45,hjust=1),
        legend.position = "none")

comb %>% mutate(xis = as.numeric(as.factor(data))) %>%
  ggplot(aes(x=xis,y=stdev, col=paste(fplot,time))) + geom_line() +
  facet_grid(vital~term) + 
  scale_x_continuous(breaks = 1:3, labels=levels(comb$data),
                     name="") +
  theme(axis.text.x = element_text(angle=45, hjust=1),
        legend.position = "none")

comb %>% mutate(xis = as.numeric(as.factor(data))) %>%
  ggplot(aes(x=xis,y=VPC, col=paste(fplot,time))) + geom_line() +
  facet_grid(vital~term) + 
  scale_x_continuous(breaks = 1:3, labels=levels(comb$data),
                     name="") +
  theme(axis.text.x = element_text(angle=45, hjust=1),
        legend.position = "none")

Difference to all data

Absolute difference

dif <- comb %>% 
  pivot_wider(id_cols = c(vital,fplot,time,term),
              names_from = data,
              values_from = c(stdev,VPC, richness)) %>%
  mutate(stdev.dif_exclude = stdev_exclude - stdev_all,
         VPC.dif_exclude =  VPC_exclude - VPC_all,
         stdev.dif_regroup = stdev_regroup - stdev_all,
         VPC.dif_regroup =  VPC_regroup - VPC_all) %>%
  select(vital,fplot,time,term,richness_exclude,
         stdev.dif_exclude,
         stdev.dif_regroup, VPC.dif_exclude, VPC.dif_regroup) %>%
  pivot_longer(6:9, names_to = c("index", "data"), names_sep = "_",
                #names_pattern = "(.*_).(.*)",
                values_to =  "value") %>%
  pivot_wider(names_from = index, values_from = value)
dif %>%
  ggplot(aes(x=data, y=stdev.dif)) + 
  facet_grid(vital~term, scales= "free_x") +
  geom_boxplot() +
  geom_quasirandom(aes(col=fplot)) +
  ylab("SD no.rare - SD all") + xlab("") +
  geom_hline(yintercept = 0, linetype= "dotted")  +
  ggtitle("Difference in SD") +
  theme(legend.position = "right",
        panel.background = element_rect(colour="lightgray"),
        axis.text.x = element_text(angle=45, hjust=1))

dif %>%
  ggplot(aes(x=data, y=VPC.dif)) + 
  facet_grid(vital~term, scales= "free_x") +
  geom_boxplot() +
  geom_quasirandom(aes(col=fplot)) +
  ylab("VPC no.rare - VPC all") + xlab("") +
  geom_hline(yintercept = 0, linetype= "dotted")  +
  ggtitle("Difference in VPC") +
  theme(legend.position = "right",
        panel.background = element_rect(colour="lightgray"),
        axis.text.x = element_text(angle=45, hjust=1))

Relative difference SD

difr <- comb %>% 
  pivot_wider(id_cols = c(vital,fplot,time,term),
              names_from = data,
              values_from = c(stdev,VPC, richness)) %>%
  mutate(stdev.dif_exclude = (stdev_exclude - stdev_all)*100/stdev_all,
         stdev.dif_regroup = (stdev_regroup - stdev_all)*100/stdev_all) %>%
  select(vital,fplot,time,term,richness_exclude,
         stdev.dif_exclude,stdev.dif_regroup) %>%
  pivot_longer(6:7, names_to = "data",
                values_to =  "stdev.dif") %>%
  mutate(data = substr(data,11, nchar(data)))
difr %>%
  ggplot(aes(x=data, y=stdev.dif)) + 
  facet_grid(vital~term, scales= "free_x") +
  geom_boxplot() +
  geom_quasirandom(aes(col=fplot)) +
  ylab("Proportional difference (%)") + xlab("") +
  geom_hline(yintercept = 0, linetype= "dotted")  +
  ggtitle("Relative difference in SD") +
  theme(legend.position = "right",
        panel.background = element_rect(colour="lightgray"),
        axis.text.x = element_text(angle=45, hjust=1))

difr %>% group_by(vital, term, data) %>% summarise(mean=mean(stdev.dif, na.rm=T)) %>%
  pivot_wider(names_from = term, values_from = mean)
## # A tibble: 6 × 6
## # Groups:   vital [3]
##   vital data    quadrat `quadrat:sp`     sp Residual
##   <chr> <chr>     <dbl>        <dbl>  <dbl>    <dbl>
## 1 grow  exclude 11.0           8.47  -11.8      13.2
## 2 grow  regroup  8.71          3.97  -12.7      13.9
## 3 mort  exclude  0.0278       -1.03  -14.9       0  
## 4 mort  regroup -2.62         -0.591 -16.4       0  
## 5 rec   exclude -0.565        -0.299  -9.26      0  
## 6 rec   regroup  0.512        -0.768 -10.4       0

table

Relative differences in SD

difr %>% group_by(vital, term, data) %>% 
  summarise(mean=mean(stdev.dif, na.rm=T)) %>%
  pivot_wider(names_from = c(vital,data), values_from = mean) %>%
  kable(digits=1)
term grow_exclude grow_regroup mort_exclude mort_regroup rec_exclude rec_regroup
quadrat 11.0 8.7 0.0 -2.6 -0.6 0.5
quadrat:sp 8.5 4.0 -1.0 -0.6 -0.3 -0.8
sp -11.8 -12.7 -14.9 -16.4 -9.3 -10.4
Residual 13.2 13.9 0.0 0.0 0.0 0.0

Mean absolute differences in VPC

dif %>% group_by(vital, term, data) %>% 
  summarise(mean=mean(VPC.dif, na.rm=T)) %>%
  pivot_wider(names_from = c(vital,data), values_from = mean) -> write
kable(write)
term grow_exclude grow_regroup mort_exclude mort_regroup rec_exclude rec_regroup
quadrat 0.0048918 0.0039062 0.0081885 0.0061325 0.0069821 0.0130636
quadrat:sp 0.0079957 0.0028262 0.0139204 0.0162427 0.0076814 0.0044177
sp -0.0831334 -0.0908756 -0.0704275 -0.0754252 -0.0321646 -0.0368164
Residual 0.0702459 0.0841432 0.0483186 0.0530499 0.0175012 0.0193351

Comparing with NUMBER of rare species

dif %>%
  ggplot(aes(x=richness_exclude, y=stdev.dif, shape=data)) + 
  geom_point(aes(col=fplot)) + 
  geom_smooth(method="lm", se=F, aes(linetype=data)) +
  facet_grid(vital~term) +
  ylab("Difference in SD to original data") +
  xlab("log N of rare species") +
  geom_hline(yintercept = 0, linetype= "dotted")  +
  ggtitle("Difference in SD") +
  scale_x_log10()+
  theme(axis.text.x = element_text(angle=45, hjust=1),
        )

difr %>%
  ggplot(aes(x=richness_exclude, y=stdev.dif, shape=data)) + 
  geom_point(aes(col=fplot)) + 
  geom_smooth(method="lm", se=F, aes(linetype=data)) +
  facet_grid(vital~term) +
  ylab("Relative difference in SD to original data") +
  xlab("log N of rare species") +
  geom_hline(yintercept = 0, linetype= "dotted")  +
  ggtitle("Proportional difference in SD to original data") +
  theme(axis.text.x = element_text(angle=45, hjust=1),
        panel.background = element_rect(fill="white", color="black"))

pm <- dif %>%
  ggplot(aes(x=richness_exclude, y=VPC.dif, shape=data)) + 
  geom_point(aes(col=fplot)) + 
  facet_grid(vital~term) +
   geom_smooth(method="lm", se=F, aes(linetype=data)) +
  ylab(" Absolute difference in VPC to original data") +
   xlab("log N of rare species") +
  geom_hline(yintercept = 0, linetype= "dotted")  +
  theme(axis.text.x = element_text(angle=45, hjust=1),
        panel.background = element_rect(fill="white", color="black"))
pm

Dirichlet regression for exclude results with species richness

Richness by rarefaction at the 6ha plot size (sampling unit 20x20m)

load(here("data", "rarefaction.curves.Rdata"))
rich.rare <- rare[rare$sites ==150,2:3]
colnames(rich.rare)[1] <- "richness.rarefaction"
comb <- comb %>% left_join(rich.rare, by="fplot")

Latitude

load(here("data", "plots_structure.Rdata"))
comb <- comb %>% left_join(plots.structure[,1:2], "fplot")
#mean for each forest
mcomb <- comb %>% group_by(vital,data, fplot,term, lat) %>%
  summarise(VPC = mean(VPC),
            stdev = mean(stdev),
            richness.rarefaction = mean(richness.rarefaction))

GROWTH

predirig <-  mcomb %>% 
  filter(vital == "grow", data == "exclude") %>%
  select(fplot, term, VPC, richness.rarefaction,lat) 
diridatag <- predirig %>%
  pivot_wider(names_from = term, values_from = VPC) %>% ungroup() %>%
  mutate(log.rich = log(richness.rarefaction),
         log.rich.o=log.rich) %>%
  mutate_at(vars( log.rich), scale)

vpcg <- DR_data(diridatag[,c("quadrat", "sp", "quadrat:sp",  "Residual")])
#plot(vpc)
grow2 <- DirichReg(vpcg~log.rich, data=diridatag, model="alternative", base=4)
summary(grow2)
## Call:
## DirichReg(formula = vpcg ~ log.rich, data = diridatag, model = "alternative",
## base = 4)
## 
## Standardized Residuals:
##                 Min       1Q   Median       3Q     Max
## quadrat     -0.9058  -0.4992  -0.2701  -0.0166  0.9614
## sp          -1.5134  -0.7438  -0.3542   0.2168  3.7107
## quadrat:sp  -1.7533  -0.5644  -0.2376   0.3694  2.6473
## Residual    -2.0096  -0.8659   0.3287   1.1003  1.8630
## 
## MEAN MODELS:
## ------------------------------------------------------------------
## Coefficients for variable no. 1: quadrat
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.6205     0.1744 -15.026  < 2e-16 ***
## log.rich      0.4740     0.1675   2.829  0.00466 ** 
## ------------------------------------------------------------------
## Coefficients for variable no. 2: sp
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.09746    0.09944 -11.036   <2e-16 ***
## log.rich    -0.20184    0.10053  -2.008   0.0447 *  
## ------------------------------------------------------------------
## Coefficients for variable no. 3: quadrat:sp
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.44533    0.11231 -12.869   <2e-16 ***
## log.rich    -0.08056    0.11309  -0.712    0.476    
## ------------------------------------------------------------------
## Coefficients for variable no. 4: Residual
## - variable omitted (reference category) -
## ------------------------------------------------------------------
## 
## PRECISION MODEL:
## ------------------------------------------------------------------
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.4105     0.1789   19.06   <2e-16 ***
## ------------------------------------------------------------------
## Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-likelihood: 102.9 on 7 df (38 BFGS + 1 NR Iterations)
## AIC: -191.7, BIC: -184.4
## Number of Observations: 21
## Links: Logit (Means) and Log (Precision)
## Parametrization: alternative
newdata <- expand.grid(log.rich = seq(min(diridatag$log.rich),
                                      max(diridatag$log.rich), length.out=10))
pred <- predict(grow2, newdata = newdata, se=T)
confint(grow2)
## 
## 95% Confidence Intervals (original form)
## 
## - Beta-Parameters:
## Variable: quadrat
##                2.5%    Est.   97.5%
## (Intercept)  -2.962  -2.621  -2.279
## log.rich      0.146   0.474   0.802
## 
## Variable: sp
##                2.5%    Est.   97.5%
## (Intercept)  -1.292  -1.097  -0.903
## log.rich     -0.399  -0.202  -0.005
## 
## Variable: quadrat:sp
##                2.5%    Est.   97.5%
## (Intercept)  -1.665  -1.445  -1.225
## log.rich     -0.302  -0.081   0.141
## 
## Variable: Residual
##   variable omitted
## 
## - Gamma-Parameters
##              2.5%  Est.  97.5%
## (Intercept)  3.06  3.41   3.76
colnames(pred) <- colnames(vpcg)
newdata$log.rich.o <- newdata$log.rich*sd(diridatag$log.rich.o) +
  mean(diridatag$log.rich.o)
newdata$rich.o <- exp(newdata$log.rich.o)
newpredg <- cbind(newdata,pred) %>% pivot_longer(`quadrat`:`Residual`, names_to="term", values_to="pred")

newpredg %>% mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp",
                                       "Residual")) %>%
  ggplot(aes(x=rich.o, y=pred, col=term)) +
  geom_line() + facet_grid(~term) +
  geom_point(data=predirig, aes(x=richness.rarefaction, y=VPC, col=term)) +
  scale_x_log10() +
  ggtitle("Mod log.rich")

Residuals

resid <- residuals(grow2, type = "standardized")
resid <-  data.frame(resid[,1:4])
resid$log.rich <- diridatag$log.rich
resid <- resid %>% pivot_longer(1:4, names_to="term", values_to="resid")

ggplot(resid, aes(x=log.rich, y=resid)) + geom_point() +
  facet_grid(~term)

MORTALITY

predirim <-  mcomb %>% 
  filter(vital == "mort", data == "exclude") %>%
  select(fplot, term, VPC, richness.rarefaction,lat) 
diridatam <- predirim %>%
  pivot_wider(names_from = term, values_from = VPC) %>% ungroup() %>%
  mutate(log.rich = log(richness.rarefaction),
         log.rich.o=log.rich) %>%
  mutate_at(vars( log.rich), scale)

vpcm <- DR_data(diridatam[,c("quadrat", "sp", "quadrat:sp",  "Residual")])
#plot(vpc)
mort2 <- DirichReg(vpcm~log.rich,data=diridatam, model="alternative", base=4)
summary(mort2)
## Call:
## DirichReg(formula = vpcm ~ log.rich, data = diridatam, model = "alternative",
## base = 4)
## 
## Standardized Residuals:
##                 Min       1Q   Median      3Q     Max
## quadrat     -1.2846  -0.6061  -0.0454  0.4078  1.8812
## sp          -1.9682  -0.8456   0.1570  0.9680  2.6456
## quadrat:sp  -1.8027  -0.6593  -0.0102  0.2449  1.6414
## Residual    -2.9957  -0.2679   0.0598  0.6229  1.6857
## 
## MEAN MODELS:
## ------------------------------------------------------------------
## Coefficients for variable no. 1: quadrat
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.6901     0.1375 -12.295  < 2e-16 ***
## log.rich      0.4103     0.1473   2.786  0.00534 ** 
## ------------------------------------------------------------------
## Coefficients for variable no. 2: sp
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.87149    0.10008  -8.708   <2e-16 ***
## log.rich    -0.14106    0.09865  -1.430    0.153    
## ------------------------------------------------------------------
## Coefficients for variable no. 3: quadrat:sp
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2178     0.1135  -10.73   <2e-16 ***
## log.rich     -0.2471     0.1217   -2.03   0.0424 *  
## ------------------------------------------------------------------
## Coefficients for variable no. 4: Residual
## - variable omitted (reference category) -
## ------------------------------------------------------------------
## 
## PRECISION MODEL:
## ------------------------------------------------------------------
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.3788     0.1756   19.24   <2e-16 ***
## ------------------------------------------------------------------
## Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-likelihood: 88.64 on 7 df (43 BFGS + 1 NR Iterations)
## AIC: -163.3, BIC: -156
## Number of Observations: 21
## Links: Logit (Means) and Log (Precision)
## Parametrization: alternative
newdata <- expand.grid(log.rich = seq(min(diridatam$log.rich),
                                      max(diridatam$log.rich), length.out=10))
pred <- predict(mort2, newdata = newdata, se=T)
confint(mort2)
## 
## 95% Confidence Intervals (original form)
## 
## - Beta-Parameters:
## Variable: quadrat
##                2.5%   Est.   97.5%
## (Intercept)  -1.960  -1.69  -1.421
## log.rich      0.122   0.41   0.699
## 
## Variable: sp
##                2.5%    Est.   97.5%
## (Intercept)  -1.068  -0.871  -0.675
## log.rich     -0.334  -0.141   0.052
## 
## Variable: quadrat:sp
##                2.5%    Est.   97.5%
## (Intercept)  -1.440  -1.218  -0.995
## log.rich     -0.486  -0.247  -0.009
## 
## Variable: Residual
##   variable omitted
## 
## - Gamma-Parameters
##              2.5%  Est.  97.5%
## (Intercept)  3.04  3.38   3.72
colnames(pred) <- colnames(vpcm)
newdata$log.rich.o <- newdata$log.rich*sd(diridatam$log.rich.o) +
  mean(diridatam$log.rich.o)
newdata$rich.o <- exp(newdata$log.rich.o)
newpredm <- cbind(newdata,pred) %>% pivot_longer(`quadrat`:`Residual`, names_to="term", values_to="pred")

newpredm %>% mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp",
                                       "Residual")) %>%
  ggplot(aes(x=rich.o, y=pred, col=term)) +
  geom_line() + facet_grid(~term) +
  geom_point(data=predirim, aes(x=richness.rarefaction, y=VPC, col=term)) +
  scale_x_log10() +
  ggtitle("Mod log.rich")

Residuals

resid <- residuals(mort2, type = "standardized")
resid <-  data.frame(resid[,1:4])
resid$log.rich <- diridatam$log.rich
resid <- resid %>% pivot_longer(1:4, names_to="term", values_to="resid")

ggplot(resid, aes(x=log.rich, y=resid)) + geom_point() +
  facet_grid(~term)

RECRUITMENT

predirir <-  mcomb %>% 
  filter(vital == "rec", data == "exclude") %>%
  select(fplot, term, VPC, richness.rarefaction,lat) 
diridatar <- predirir %>%
  pivot_wider(names_from = term, values_from = VPC) %>% ungroup() %>%
  mutate(log.rich = log(richness.rarefaction),
         log.rich.o=log.rich) %>%
  mutate_at(vars( log.rich), scale)

vpcr <- DR_data(diridatar[,c("quadrat", "sp", "quadrat:sp",  "Residual")])
#plot(vpc)
rec2 <- DirichReg(vpcr~log.rich,data=diridatar, model="alternative", base=4)
summary(rec2)
## Call:
## DirichReg(formula = vpcr ~ log.rich, data = diridatar, model = "alternative",
## base = 4)
## 
## Standardized Residuals:
##                 Min       1Q   Median      3Q     Max
## quadrat     -1.5124  -0.6320  -0.2358  0.8198  2.1508
## sp          -2.8666  -0.2882   0.1438  0.7867  1.6337
## quadrat:sp  -1.4254  -0.6122  -0.2100  0.3186  2.8831
## Residual    -1.5080  -0.6407  -0.0523  0.5100  3.1244
## 
## MEAN MODELS:
## ------------------------------------------------------------------
## Coefficients for variable no. 1: quadrat
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.586194   0.129600  -4.523 6.09e-06 ***
## log.rich     0.005999   0.140757   0.043    0.966    
## ------------------------------------------------------------------
## Coefficients for variable no. 2: sp
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.08768    0.11441  -0.766    0.443    
## log.rich    -0.71600    0.11995  -5.969 2.38e-09 ***
## ------------------------------------------------------------------
## Coefficients for variable no. 3: quadrat:sp
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.70344    0.13331  -5.277 1.32e-07 ***
## log.rich    -0.07571    0.14027  -0.540    0.589    
## ------------------------------------------------------------------
## Coefficients for variable no. 4: Residual
## - variable omitted (reference category) -
## ------------------------------------------------------------------
## 
## PRECISION MODEL:
## ------------------------------------------------------------------
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.2418     0.1825   17.77   <2e-16 ***
## ------------------------------------------------------------------
## Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-likelihood: 71.27 on 7 df (43 BFGS + 1 NR Iterations)
## AIC: -128.5, BIC: -121.9
## Number of Observations: 19
## Links: Logit (Means) and Log (Precision)
## Parametrization: alternative
newdata <- expand.grid(log.rich = seq(min(diridatar$log.rich),
                                      max(diridatar$log.rich), length.out=10))
pred <- predict(rec2, newdata = newdata, se=T)
colnames(pred) <- colnames(vpcr)
newdata$log.rich.o <- newdata$log.rich*sd(diridatar$log.rich.o) +
  mean(diridatar$log.rich.o)
newdata$rich.o <- exp(newdata$log.rich.o)
newpredr <- cbind(newdata,pred) %>% pivot_longer(`quadrat`:`Residual`, names_to="term", values_to="pred")

newpredr %>% mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp",
                                       "Residual")) %>%
  ggplot(aes(x=rich.o, y=pred, col=term)) +
  geom_line() + facet_grid(~term) +
  geom_point(data=predirir, aes(x=richness.rarefaction, y=VPC, col=term)) +
  scale_x_log10() +
  ggtitle("Mod log.rich")

Residuals

resid <- residuals(rec2, type = "standardized")
resid <-  data.frame(resid[,1:4])
resid$log.rich <- diridatar$log.rich
resid <- resid %>% pivot_longer(1:4, names_to="term", values_to="resid")

ggplot(resid, aes(x=log.rich, y=resid)) + geom_point() +
  facet_grid(~term)

FIGURE

Calculated prediction interval

source(here("scripts/prediction_intervals_dirichlet_exclude.R"), local=T)
#load("results/prediction_intervals_dirichlet_exclude.Rdata") #  quants
predis <- bind_rows(list(grow=newpredg, mort=newpredm, rec=newpredr),
                    .id="vital") %>% 
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp","Residual"))

pontos <- bind_rows(list(grow=predirig, mort= predirim, rec=predirir), .id="vital")%>% 
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp", "Residual"))
quants <- bind_rows(list(grow=quantg,mort=quantm, rec=quantr), .id="vital")%>% 
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp", "Residual"))
# pvalues
res <- data.frame(vital = c("grow", "mort", "rec"),
                  P = NA,
                  term = "Residual")

test <- summary(grow2)
vals <- as.data.frame(test$coef.mat)[1:6,]
vals$coef <- names(test$coefficients)[1:6]
valsg <- vals[ grep("log.rich", vals$coef),] %>% 
  mutate(term=c("quadrat", "sp", "quadrat:sp"))
test <- summary(mort2)
vals <- as.data.frame(test$coef.mat)[1:6,]
vals$coef <- names(test$coefficients)[1:6]
valsm <- vals[ grep("log.rich", vals$coef),] %>% 
  mutate(term=c("quadrat", "sp", "quadrat:sp"))
test <- summary(rec2)
vals <- as.data.frame(test$coef.mat)[1:6,]
vals$coef <- names(test$coefficients)[1:6]
valsr <- vals[ grep("log.rich", vals$coef),] %>% 
  mutate(term=c("quadrat", "sp", "quadrat:sp")) 

pvals <- bind_rows(list(grow=valsg,mort=valsm,rec=valsr), .id="vital") %>%
  dplyr::select(vital,`Pr(>|z|)`, term) %>% rename(P = `Pr(>|z|)`)
pvals <- rbind(pvals,res) %>% 
  mutate(term = fct_relevel(term, "quadrat", "quadrat:sp", "sp", "Residual"))

pvals$x <- 300
pvals$y <- 0.70
pvals$sig <- paste0("p = ", round(pvals$P,3))
#pvals$sig[pvals$P >0.0166] <- "ns"
pvals$sig[pvals$term=="Residual"] <- ""
pvals$sig[pvals$sig=="p = 0"] <- "p < 0.0001"
library(wesanderson) #better colour palette
# Gradient color para latitutde
pal <- wes_palette("Zissou1",20, type = "continuous")[20:1] # azul frio-temperado
fdiri_lat <-ggplot(predis, aes(x=rich.o, y=pred)) +
  geom_line(size=1) +
  facet_grid(vital~term, labeller=grpvit) +
  geom_smooth(data=quants,aes(x=rich.o, y=lower), se=F, size=0.1)+
  geom_smooth(data=quants,aes(x=rich.o, y=upper), se=F, size=0.1)  +
  geom_ribbon(data=quants,aes(x=rich.o, ymin=lower, ymax=upper,
                              y=mean), alpha=0.05, fill="blue",
              size=0)+
  geom_point(data=pontos, aes(x=richness.rarefaction, y=VPC, fill=abs(lat)),
             col="black", size=2.5, pch=21)+
  scale_fill_gradientn(colors=pal) +
  scale_y_continuous(minor_breaks = NULL) +
  scale_x_log10() +
  geom_text(data=pvals, aes(x=x,y=y, label=sig), size=3, hjust=1,vjust=0,
            col="black")+
  
  xlab("Species richness (log10 scale)")+
  ylab("VPC") +
  theme_cowplot() +
  theme(panel.background = element_rect(colour="lightgray"),
        legend.position = "none")
fdiri_lat

png(here("figures/FIG_S5.5_dirichlet_regressions_lat_exclude.png"), height = 600, width=800, res=100)
fdiri_lat
dev.off()
## quartz_off_screen 
##                 2